from google.colab import drive
from google.colab import files
drive.mount('/content/drive', force_remount = True)
!pip install geopandas
!pip install descartes
!pip install geos
!pip install shapely
!apt install python3-rtree
!pip install geos
!pip install ptitprince
!pip install mpld3
import rtree
import descartes
import plotly.express as px
import pandas as pd
import numpy as np
import altair as alt
import vega_datasets as vega_data
import json
import geopandas as gpd
import regex as re
import shapely
import geos
import seaborn as sns
import os
import matplotlib.pyplot as plt
#sns.set(style="darkgrid")
#sns.set(style="whitegrid")
#sns.set_style("white")
sns.set(style="whitegrid",font_scale=2)
import matplotlib.collections as clt
import mpld3
import ptitprince as pt
cd 'drive/My Drive/data_viz'
markColor = 'black'
axisColor = 'black'
backgroundColor = 'grey';
font = 'Gotham';
labelFont = 'Gotham';
sourceFont = 'Gotham';
gridColor = 'black';
titleFontSize = 20;
subtitleFontSize = 15;
holc_scheme = ["#86B404","#045FB4", "#F3F781", "#DF3A01"]
redlining_pal = ["#FFFFB2", "#FECC5C", "#FD8D3C", "#E31A1C"]
def vedika_theme():
return {
'config': {
'view': {
'height': 500,
'width': 500,
},
#'mark': {
#'color': 'black',
#'fill': 'black'
#},
"title": {
"anchor": 'start',
"fontSize": titleFontSize,
"font": font,
"subtitleFont" : font,
"subtitleFontSize" : subtitleFontSize,
"titlePadding" : 20
},
"axisX": {
"domain": True,
"domainColor": axisColor,
"domainWidth": 1,
"grid": False,
"labelFontSize": 12,
"labelFont": labelFont,
"labelAngle": 0,
"tickColor": axisColor,
"tickSize": 5,
"titleFontSize": 12,
"titlePadding": 10,
"titleFont": font
},
"axisY": {
"domain": False,
"domainWidth": 1,
"grid": True,
"gridColor": gridColor,
"gridWidth": 1,
"labelFontSize": 12,
"labelFont": labelFont,
"labelPadding": 8,
"ticks": False,
"grid" : False,
"titleFontSize": 12,
"titlePadding": 10,
"titleFont": font,
#"titleAngle": 0,
#"titleY": -10,
#"titleX": 18
},
"legend": {
"labelFont": labelFont,
"labelFontSize": 12,
"symbolType": "circle",
"symbolSize": 200,
"titleFont": font,
"titleFontSize": 12,
#"title": "", # set it to no-title by default
"orient": "right", # so it's right next to the y-axis
"offset": 20, # literally right next to the y-axis.
},
"view": { # altair uses gridlines to box the area where the data is visualized. This takes that off.
"strokeWidth" : 0
},
#"background": {
# "color": "#FFFFFF", # white rather than transparent
#},
### MARKS CONFIGURATIONS ###
#"area": {
# "fill": "transparent",
#},
"range": {
"category": {"scheme" : 'tableau10'},
"diverging": {"scheme" : 'yelloworangered'},
"heatmap": {"scheme":'diverging-colors'},
"ordinal": {"scheme":'holc_scheme'}}
},
}
# register the custom theme under a chosen name
alt.themes.register('vedika_theme', vedika_theme)
# enable the newly registered theme
alt.themes.enable('vedika_theme')
def open_geojson(geo_json_file_loc):
with open(geo_json_file_loc) as json_data:
d = json.load(json_data)
return d
def get_gpd_df(file_location, use_shape_file=False):
if use_shape_file:
gdf = gpd.read_file(file_location)
else:
file_json = open_geojson(file_location)
gdf = gpd.GeoDataFrame.from_features((file_json))
return gdf
def fix_census_tract_num(df):
df['census_tract_number'] = df['GEO.id2'].str[-6:-2] + "." + df['GEO.id2'].str[-2:]
df['census_tract_number'].replace('\.0$', '', regex=True, inplace=True)
df['census_tract_number'].replace('\.00$', '', regex=True, inplace=True)
df['census_tract_number'] = df['census_tract_number'].str.lstrip('0')
return df
geo_json_file_loc = "data/raw/Boundaries - Census Tracts - 2010.geojson"
geo_json_community_areas = "data/raw/Boundaries - Community Areas (current).geojson"
#load data
hmda_2007 = pd.read_csv("data/raw/hmda_2007_il_first-lien-owner-occupied-1-4-family-records_labels.csv")
hmda_2017 = pd.read_csv("data/raw/hmda_2017_il_first-lien-owner-occupied-1-4-family-records_labels.csv")
chi_census_tracts = pd.read_csv("data/raw/CensusTractsTIGER2010.csv")
chi_community_areas = pd.read_csv("data/raw/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv")
chi_census_geojson = "data/raw/Boundaries - Census Tracts - 2010.geojson"
comm_area_gpd = get_gpd_df(geo_json_community_areas)
acs_demos_2017 = pd.read_csv("data/raw/ACS_17_5YR_DP05_with_ann.csv")[1:]
acs_demos_2017['tot_pop_2017'] = acs_demos_2017['HC01_VC03'].astype("int")
acs_demos_2017['white_pop_2017'] = acs_demos_2017['HC01_VC99'].astype("int")
acs_demos_2017['black_pop_2017'] = acs_demos_2017["HC01_VC100"].astype("int")
acs_demos_2017['asian_pop_2017'] = acs_demos_2017['HC01_VC102'].astype("int")
acs_demos_2017['AIAN_pop_2017'] = acs_demos_2017['HC01_VC101'].astype("int")
acs_demos_2017['not_hispanic'] = acs_demos_2017["HC01_VC98"].astype("int")
acs_demos_2017['hispanic_pop_2017'] = acs_demos_2017['tot_pop_2017'] - acs_demos_2017['not_hispanic']
acs_demos_2017 = fix_census_tract_num(acs_demos_2017)
acs_demos_2017['poc_pop_2017'] = acs_demos_2017['tot_pop_2017'] - acs_demos_2017['white_pop_2017']
acs_demos_2017['poc_perc_2017'] = acs_demos_2017['poc_pop_2017']/acs_demos_2017['tot_pop_2017']
acs_demos_2017 = acs_demos_2017[["census_tract_number","poc_perc_2017", "poc_pop_2017", "tot_pop_2017", 'white_pop_2017', 'black_pop_2017', 'AIAN_pop_2017', 'asian_pop_2017', 'hispanic_pop_2017']]
acs_demos_2017['poc_alt'] = acs_demos_2017['black_pop_2017'] + acs_demos_2017['asian_pop_2017'] + acs_demos_2017['AIAN_pop_2017'] + acs_demos_2017['hispanic_pop_2017']
acs_demos_2017['poc_perc_alt'] = acs_demos_2017['poc_alt'] / acs_demos_2017['tot_pop_2017']
acs_demos_2010 = pd.read_csv("data/raw/ACS_10_5YR_DP05_with_ann.csv")[1:]
acs_demos_2010 = fix_census_tract_num(acs_demos_2010)
acs_demos_2010['tot_pop_2010'] = acs_demos_2010['HC01_VC03'].astype('int')
acs_demos_2010['white_pop_2010'] = acs_demos_2010['HC01_VC88'].astype('int')
acs_demos_2010['black_pop_2010'] = acs_demos_2010['HC01_VC89'].astype('int')
acs_demos_2010['AIAN_pop_2010'] = acs_demos_2010['HC01_VC90'].astype('int')
acs_demos_2010['asian_pop_2010'] = acs_demos_2010['HC01_VC91'].astype('int')
acs_demos_2010['not_hispanic'] = acs_demos_2010['HC01_VC87'].astype('int')
acs_demos_2010['hispanic_pop_2010'] = acs_demos_2010['tot_pop_2010'] - acs_demos_2010['not_hispanic']
acs_demos_2010['poc_pop_2010'] = acs_demos_2010['tot_pop_2010'] - acs_demos_2010['white_pop_2010']
acs_demos_2010['poc_perc_2010'] = acs_demos_2010['poc_pop_2010']/acs_demos_2010['tot_pop_2010']
acs_demos_2010 = acs_demos_2010[["census_tract_number", "tot_pop_2010", 'white_pop_2010', 'black_pop_2010', 'AIAN_pop_2010', 'asian_pop_2010', 'hispanic_pop_2010']]
#download 2017 median income data
acs_median_income_2017 = pd.read_csv("data/raw/ACS_17_5YR_S1903_with_ann.csv")[1:]
acs_median_income_2017 = fix_census_tract_num(acs_median_income_2017)
acs_median_income_2017['median_income_tract_2017'] = acs_median_income_2017['HC03_EST_VC02']
acs_median_income_2017 = acs_median_income_2017[['census_tract_number', 'median_income_tract_2017']]
census_2000 = pd.read_csv('data/raw/DEC_00_SF1_QTP3_with_ann.csv')[1:]
census_2000 = fix_census_tract_num(census_2000)
census_2000['total'] = census_2000["HC01_VC02"].astype('int')
census_2000['hispanic'] = census_2000["HC01_VC34"].astype('int')
census_2000['black'] = census_2000["HC01_VC05"].astype('int')
census_2000['white'] = census_2000["HC01_VC04"].astype('int')
census_2000['asian'] = census_2000["HC01_VC11"].astype('int')
census_2000['AIAN'] = census_2000["HC01_VC20"].astype('int')
census_2000_south = census_2000[census_2000['census_tract_number'].isin(["8140", "3302", "3301"])]
census_2000_south = census_2000_south[['total', 'hispanic', 'black','white','asian', 'AIAN' ]]
census_2000_south['ind'] = 1
census_2000_south = census_2000_south.groupby('ind').sum().reset_index()
census_2000_wp = census_2000[census_2000['census_tract_number'].isin(["8345", "8361", "4003", "4004", "4005", "4008"])]
#census tract map prep
geo_json_file_loc = "data/raw/Boundaries - Census Tracts - 2010.geojson"
census_df = get_gpd_df(chi_census_geojson)
#census_df = gpd.GeoDataFrame.from_features((census_json)) - I think I can get rid of this.
#save geojson as json
with open(geo_json_file_loc) as json_data:
census_json = json.load(json_data)
chi_census_tracts = census_df.rename({'name10' : 'census_tract_number'}, axis='columns')
chi_census_tracts['census_tract_number'] = chi_census_tracts['census_tract_number'].astype('str')
#Create hmda dataset
hmda_all = hmda_2007.append(hmda_2017)
hmda_all = hmda_all[hmda_all['loan_purpose_name'] == 'Home purchase']
hmda_all['census_tract_number'] = hmda_all['census_tract_number'].astype('str')
hmda_all = hmda_all[hmda_all['county_name'] == "Cook County"]
#if census tract ends in .0, get rid of .0
hmda_all['census_tract_number'].replace('\.0$', '', regex=True, inplace=True)
#add race and ethnicity data
hmda_all['race_ethnicity'] = np.where(hmda_all['applicant_ethnicity_name'] == 'Hispanic or Latino', 'Latino', hmda_all['applicant_race_name_1'])
hmda_all.loc[hmda_all['race_ethnicity'] == 'Information not provided by applicant in mail, Internet, or telephone application', 'race_ethnicity'] = 'NA'
hmda_all.loc[hmda_all['race_ethnicity'] == 'Not applicable', 'race_ethnicity'] = 'NA'
#population by census tract
pop_over_time = hmda_all.groupby(['census_tract_number', "as_of_year"]).agg({'population' : 'mean'}).reset_index()
pop_over_time['year'] = np.where(pop_over_time['as_of_year'] == 2007, "year_2007", "year_2017")
pop_over_time = pop_over_time.pivot(index='census_tract_number', columns='year', values='population').reset_index()
pop_over_time = pop_over_time.rename({"year_2007": "pop_2007", "year_2017": "pop_2017"}, axis="columns")
pop_minority = hmda_all.groupby(['census_tract_number', "as_of_year"]).agg({'minority_population' : 'mean'}).reset_index()
pop_minority['minority_population'] = pop_minority['minority_population']/100
pop_minority['year'] = np.where(pop_minority['as_of_year'] == 2007, "year_2007", "year_2017")
pop_minority = pop_minority.pivot(index='census_tract_number', columns='year', values='minority_population').reset_index()
pop_minority = pop_minority.rename({"year_2007": "pop_min_2007", "year_2017": "pop_min_2017"}, axis="columns")
# Aggregate HMDA Data by census tract, and clean
## count by census tract
chi_census_loan_counts = hmda_all.groupby(['census_tract_number', "as_of_year"]).agg({'respondent_id':'count',
'population' : 'mean'}).reset_index()
chi_census_loan_counts['year'] = np.where(chi_census_loan_counts['as_of_year'] == 2007, "year_2007", "year_2017")
chi_census_loan_counts = chi_census_loan_counts.pivot(index='census_tract_number', columns='year', values='respondent_id').reset_index()
chi_census_loan_counts = chi_census_loan_counts[chi_census_loan_counts['census_tract_number'] != 'nan']
chi_census_loan_counts['year_2007'].fillna(value=0, inplace=True)
chi_census_loan_counts['year_2017'].fillna(value=0, inplace=True)
chi_census_loan_counts = chi_census_loan_counts.rename({"year_2007": "loans_2007", "year_2017": "loans_2017"}, axis="columns")
#merge census level data
census_to_counts = chi_census_tracts.merge(chi_census_loan_counts, on="census_tract_number", how="left", indicator = "True")
census_to_counts = census_to_counts.merge(pop_over_time, on="census_tract_number", how="left")
census_to_counts = census_to_counts.merge(pop_minority, on="census_tract_number", how="left")
## add some loan measures
census_to_counts['loans_per_100_ppl_2017'] = (census_to_counts['loans_2017']/census_to_counts['pop_2017'])*100
census_to_counts['loans_per_100_ppl_2007'] = (census_to_counts['loans_2007']/census_to_counts['pop_2007'])*100
#merge on acs data
census_to_counts = census_to_counts.merge(acs_demos_2010, on='census_tract_number', indicator='merge_2010', how='left')
census_to_counts = census_to_counts.merge(acs_demos_2017, on='census_tract_number', indicator='merge_2017', how='left')
census_to_counts_map = census_to_counts.merge(acs_median_income_2017, on='census_tract_number', indicator='merge_income_2017', how='left')
census_to_counts_map['change_in_pop'] = census_to_counts_map['tot_pop_2017'] - census_to_counts_map['tot_pop_2010']
census_to_counts_map['change_in_black_pop'] = census_to_counts_map['black_pop_2017'] - census_to_counts_map['black_pop_2010']
count_json = json.loads(census_to_counts_map.to_json())
count_data = alt.Data(values=count_json['features'])
For the majority of the 20th century, financial institutions denied communities of color, especially Black communities, affordable home loans through a practice called redlining. The Federal government sanctioned and supported this systematic denial of home mortgages starting in the 1930s, when it created the Home Owners Loan Corporation (HOLC) to insure private mortgages. The HOLC created maps of major American cities to document the stability and the risks associated with lending by neighborhood.
Loan officers, appraisers and real estate professionals used the HOLC maps to decide where to provide loans and the terms of the loans. The HOLC maps documented Chicago neighborhoods in great detail, labeling each neighborhood on a riskiness scale from “A”, “best”, to “D”, “hazardous”, and providing a qualitative description of each neighborhood. The grading and descriptions are based largely on the racial and ethnic make-up, changing demographics, and housing conditions and access to public facilities in each neighborhood. Neighborhoods with high percentages of people of color, especially black folks, were coined hazardous, regardless of other factors.
These HOLC maps in Chicago not only provided guidance to real estate professionals, but also reflected how various industries essentially isolated Black communities in areas with lower investments than their white counterparts. The systematic disinvestment in communities of color in Chicago prevented them from building wealth through homeownership and accessing products and services to thrive.
The broad practice of redlining shaped how Chicago formed, robbing Black and Brown communities of the opportunity to build intergenerational wealth through homeownership. It’s legacy lives on, though not exactly how we would expect. Formerly redlined communities have changed considerably in Chicago, in ways that are important to understand when creating policies to address the racial wealth gap today. Democratic presidential candidates Elizabeth Warren and Pete Buttigieg have plans to address the impacts of redlining, and both offer benefits to residents currently living in formerly redlined areas. In Chicago, these plans could miss the people most impacted by redlining practices.
Redlined Communities in 1940 and Lending Patterns Today
There’s a popular adage in the advocacy and data viz communities in Chicago - “All maps of the city look the same.” The saying unfortunately refers to the fact that many socioeconomic indicators related to poverty, crime, and police misconduct are clustered in the South and West parts of Chicago, where Black and Brown communities live today. However, the HOLC map of Chicago created in 1940 looks quite different.
#clean redlining data
chi_1940 = get_gpd_df("data/raw/ILChicago1940.geojson")
chi_1940["holc_grade_n"] = np.where(chi_1940["holc_grade"] == "A", 1,
np.where(chi_1940["holc_grade"] == "B", 2,
np.where(chi_1940["holc_grade"] == "C", 3,
4)))
#merge redlining data with census data
census_to_counts_map['point_centroid'] = census_to_counts_map['geometry'].centroid
census_centroids = census_to_counts_map.drop(columns="geometry", axis=1).rename({"point_centroid" : "geometry"}, axis=1)
census_w_holc = gpd.sjoin(census_centroids, chi_1940, how="left", op='intersects')
census_w_holc["holc_grade_n"] = census_w_holc["holc_grade_n"].fillna(value=0)
census_w_holc["holc_grade"] = np.where(census_w_holc["holc_grade_n"] == 0, "NA", census_w_holc["holc_grade"])
census_w_holc["loans_2017"] = census_w_holc["loans_2017"].fillna(value=0)
#find maximum latitude of income map
census_to_counts_map["geometry"].total_bounds
chi_1940['latitude_centroids'] = chi_1940["geometry"].centroid.y
chi_1940_map = chi_1940[chi_1940['latitude_centroids'] < 42.023924]
chi_1940_map = chi_1940_map[chi_1940_map['latitude_centroids'] > 41.644286]
chi_1940_map.loc[chi_1940_map['holc_grade'] == "A", 'holc_grade'] = "A: Most Desirable"
chi_1940_map.loc[chi_1940_map['holc_grade'] == "B", 'holc_grade'] = "B: Still Desirable"
chi_1940_map.loc[chi_1940_map['holc_grade'] == "C", 'holc_grade'] = "C: Declining"
chi_1940_map.loc[chi_1940_map['holc_grade'] == "D", 'holc_grade'] = "D: Hazardous"
redline_map_json = json.loads(chi_1940_map.to_json())
redline_data = alt.Data(values=redline_map_json['features'])
census_base = alt.Chart(count_data).mark_geoshape(
fill = 'lightgray',
stroke='lightgray',
strokeWidth=1
).encode(
)
redlining_chloro = alt.Chart(redline_data).mark_geoshape(opacity=.9
).encode(
alt.Color('properties.holc_grade',
type='ordinal',
scale=alt.Scale(scheme="yelloworangered"),
legend =alt.Legend(title = "HOLC Grade"))).properties(
title={"text":["HOLC Grades in 1940 Chicago"], "fontSize" : 12},
height = 500,
width = 500)
redlining_map = census_base + redlining_chloro
loan_chloro = alt.Chart(count_data).mark_geoshape(
).encode(
alt.Color('properties.loans_per_100_ppl_2017',
type='quantitative',
scale=alt.Scale(scheme="tealblues"),
legend =alt.Legend(title = "Loans per 100 people", clipHeight=4))).properties(
title= {"text":["Loans Per 100 People in 2017"], "fontSize" : 12},
height = 500, width = 500)
alt.hconcat(redlining_map, loan_chloro).properties(
title={"text" : "Lending Patterns have Changed Tremendously over the Last 80 years",
'subtitle' : ["Many areas redlined in the past are now growing at the fastest rates in Chicago", "Source: HMDA 2017 Data"],
'subtitleFont': 'Gotham',
'subtitleFontSize' : 14}).configure_legend(
#strokeColor='gray',
#fillColor='#EEEEEE',
padding=6,
cornerRadius=10,
orient='bottom',
columns = 0
)
## I want this map to look like the actual HOLC map!
# There are a number of ways I could show this first map
# fix the legends
# put the maps closer to each other
# make the redlining map more transparent